{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle\n",
    "import random\n",
    "from collections import Counter\n",
    "import binascii"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [],
   "source": [
    "src_before = '''While getting his masters degree, a professor gave his students the option of solving a difficult problem instead of taking the final exam. Opting for what he thought was the easy way out, my uncle tried to find a solution to the \"smallest code\" problem. What his professor didn't tell him is that no one at that time knew the best solution. As the term drew to a close, David realized he'd have to start studying for the exam and starting throwing away his scratchings on the problem. As one of the papers hit the trash can, the algorithm came to him.'''\n",
    "src_flag = ' flag{w0w_congrats_th1s_1s_rea11y_huffman} '\n",
    "src_after = '''He published the paper \"A Method for the Construction of Minimum Redundancy Codes\" describing his algorithm in 1952. This became known as Huffman Coding. At the time he didn't consider copyrighting or patenting it, because was just an algorithm, and he didn't make a penny off of it. Because of its elegance and simplicity, it is described in many textbooks and several web pages. Today derivative forms of Huffman Coding can found in common electronics and web pages (for example, the Jpeg image file format).'''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2210"
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "src = binascii.hexlify((src_before+src_flag+src_after).lower().encode()).decode()\n",
    "len(src)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [],
   "source": [
    "cnt = Counter()\n",
    "for c in src:\n",
    "    cnt[c]+=1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('6', 649),\n",
       " ('7', 311),\n",
       " ('2', 274),\n",
       " ('0', 208),\n",
       " ('4', 124),\n",
       " ('5', 119),\n",
       " ('3', 94),\n",
       " ('9', 81),\n",
       " ('1', 77),\n",
       " ('e', 70),\n",
       " ('f', 69),\n",
       " ('8', 55),\n",
       " ('c', 35),\n",
       " ('d', 35),\n",
       " ('b', 6),\n",
       " ('a', 3)]"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cnt.most_common()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [],
   "source": [
    "huffs = [(v,k) for k,v in cnt.items()] # cnt, char\n",
    "\n",
    "while len(huffs)>1:\n",
    "    random.shuffle(huffs)\n",
    "    huffs.sort(key=lambda x:x[0])\n",
    "    c1,t1 = huffs[0]\n",
    "    c2,t2 = huffs[1]\n",
    "    huffs = huffs[2:]\n",
    "    huffs.append((c1+c2, [t1, t2]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [],
   "source": [
    "codecs = []\n",
    "def dfs(tree, pfx):\n",
    "    if isinstance(tree, str):\n",
    "        codecs.append((tree, pfx))\n",
    "    else:\n",
    "        dfs(tree[0], pfx+'0')\n",
    "        dfs(tree[1], pfx+'1')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [],
   "source": [
    "dfs(huffs[0][1], '')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('0', '000'),\n",
       " ('3', '0010'),\n",
       " ('5', '0011'),\n",
       " ('4', '0100'),\n",
       " ('8', '01010'),\n",
       " ('f', '01011'),\n",
       " ('2', '011'),\n",
       " ('e', '10000'),\n",
       " ('1', '10001'),\n",
       " ('d', '100100'),\n",
       " ('a', '10010100'),\n",
       " ('b', '10010101'),\n",
       " ('c', '1001011'),\n",
       " ('9', '10011'),\n",
       " ('7', '101'),\n",
       " ('6', '11')]"
      ]
     },
     "execution_count": 76,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "codecs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'e2924716d627f6660256c6966602567616d69602765607a60256864702c256c607d61687560227f66682023756761607022656770246e61602373696e6f627473656c65602e6f6d6d6f63602e6960246e657f66602e616360276e69646f63602e616d6666657860266f60237d627f666025667964716679627564602971646f64702e237567616070226567702c61627566756370246e6160237b6f6f626478756470297e616d602e6960246562696273637564602379602479602c29747963696c607d696370246e616025636e6167656c656023747960266f6025637571636562602e247960266f6026666f60297e6e6560702160256b616d6024772e64696460256860246e61602c2d686479627f676c61602e61602473757a602371677025637571636562602c247960276e69647e6564716070227f60276e69647867696279707f636022756469637e6f636024772e64696460256860256d696470256864702471602e276e69646f63602e616d66666578602371602e677f6e6b60256d616365626023796864702e22353931302e69602d686479627f676c616023796860276e696269627363756460222375646f636029736e61646e65746562702d657d696e696d60266f602e6f696473657274737e6f636025686470227f6660246f6864756d602162202275607160702568647024656863796c626570702568602d7e616d666665786f5971313165627f53713f537138647f53747162776e6f636f5770377b77616c66602e2d6968602f6470256d6163602d686479627f676c6160256864702c2e61636028637162747025686470247968602372756071607025686470266f60256e6f602371602e2d656c626f627070256864702e6f6023776e696863647162736370237968602971677160276e69677f62786470276e69647271647370246e61602d6168756025686470227f6660276e696974657473702472716473702f64702566716860246725686024656a796c616562702469667164602c25637f6c636021602f647027756274602d62756470256864702371602e2e6f6964757c6f637024737562602568647027756e6b60256d6964702471686470247160256e6f602f6e6024716864702379602d6968602c6c65647024772e64696460227f637375666f6270702379686024716867702e2d656c626f6270702225646f63602473756c6c616d637220256864702f64702e6f6964757c6f63702160246e6966602f6470246569627470256c636e6570297d602c24757f602971677029737165602568647023716770247867657f6864702568602471686770227f6660276e6964707f602e2d616875602c616e69666025686470276e696b616470266f6024616564737e69602d656c626f627070247c65736966666964602160276e69667c6f6370266f602e6f6964707f602568647023747e656465747370237968602566716760227f637375666f62707021602c256562776564602372756473716d6023796860276e696474756760256c6968677'"
      ]
     },
     "execution_count": 79,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "src[::-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('table.pickle', 'wb') as f:\n",
    "    pickle.dump(codecs, f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('text.txt', 'w') as f:\n",
    "    f.write(src)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [],
   "source": [
    "codecs_map = {k:v for k,v in codecs}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [],
   "source": [
    "lines = []\n",
    "lenmap = {} # char: len\n",
    "lines_set = set()\n",
    "for c in src:\n",
    "    if c not in lines_set:\n",
    "        lines.append((c, cnt[c], len(codecs_map[c])))\n",
    "        lenmap[c] = len(codecs_map[c])\n",
    "        lines_set.add(c)\n",
    "\n",
    "lines.sort(key=lambda x: (x[1], -x[2]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('a', 3, 8),\n",
       " ('b', 6, 8),\n",
       " ('c', 35, 7),\n",
       " ('d', 35, 6),\n",
       " ('8', 55, 5),\n",
       " ('f', 69, 5),\n",
       " ('e', 70, 5),\n",
       " ('1', 77, 5),\n",
       " ('9', 81, 5),\n",
       " ('3', 94, 4),\n",
       " ('5', 119, 4),\n",
       " ('4', 124, 4),\n",
       " ('0', 208, 3),\n",
       " ('2', 274, 3),\n",
       " ('7', 311, 3),\n",
       " ('6', 649, 2)]"
      ]
     },
     "execution_count": 84,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lines"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
